NKG2a+/- CD8+ T-cells

In this notebook, we will

  • define NKG2a (KLRC1) positive and negative clusters of CD8+ T-cells
  • Prepare R objects for differential expression analysis with edgeR
  • Generate plots with marker genes of interest

Input data

import scanpy as sc
            from matplotlib import colors, rcParams
            import matplotlib.pyplot as plt
            import re
            
            sc.settings.set_figure_params(dpi_save=600, figsize=(4, 4), vector_friendly=True)
            import anndata2ri
            from rpy2.robjects import r
%load_ext rpy2.ipython
anndata2ri.activate()
_ = r(
                """
            library(dplyr)
            library(edgeR)
            """
            )
R[write to console]: 
            Attaching package: ‘dplyr’


            
R[write to console]: The following objects are masked from ‘package:stats’:

                filter, lag


            
R[write to console]: The following objects are masked from ‘package:base’:

                intersect, setdiff, setequal, union


            
R[write to console]: Loading required package: limma

            
input_file = "../results/05_prepare_adata_nk_t/adata.h5ad"
            output_dir = "tmp/"
# Parameters
            input_file = "input_adata.h5ad"
            output_dir = "."
adata = sc.read_h5ad(input_file)

Prepare for DE analysis (CD8+ NKG2a+ vs CD8+ NKG2a-)

sc.pl.umap(adata, color=["cell_type", "cluster"])

adata_de = adata[adata.obs["cell_type"] == "T CD8+", :].copy()
sc.pl.umap(adata_de, color="cluster")

adata_de.obs["nkg2a_status"] = [
                "pos" if clus in ["T CD8+ 3", "T CD8+ 5", "T CD8+ 10"] else "neg"
                for clus in adata_de.obs["cluster"]
            ]
sc.pl.umap(
                adata_de,
                color=["KLRC1", "nkg2a_status"],
                size=10,
            )
... storing 'nkg2a_status' as categorical
            

obs = adata_de.obs.loc[
                :, ["nkg2a_status", "mt_frac", "n_genes"]
            ]
            gene_symbols = adata_de.var_names
            counts = adata_de.X.T.toarray()

Prepare R objects for edgeR

%Rpush counts
            %Rpush gene_symbols
            %Rpush obs
            %Rpush output_dir
_ = r(
                """
            rownames(counts) = gene_symbols
            colnames(counts) = rownames(obs)
            """
            )
r(
                """
            gen_nkg2a = function(column, filename) {
                # naming convention, there needs to be tmp_obs and tmp_counts for the downstream script. 
                tmp_obs = obs
                tmp_counts = counts
                formula = paste0("~ 0 + ", column, " + n_genes + mt_frac")
                contrast = paste0(column, "pos-", column, "neg")
                design = model.matrix(as.formula(formula), data=tmp_obs)
                contrasts = makeContrasts(contrasts=contrast, levels=colnames(design))
                print(head(contrasts))
                save(tmp_obs, tmp_counts, design, contrasts, file=file.path(output_dir, filename), compress=FALSE)
            }
            """
            )
R object with classes: ('function',) mapped to:
r(
                """
            gen_nkg2a("nkg2a_status", "de_nkg2a_status.rda")
            """
            )
                 Contrasts
            
Levels           
 nkg2a_statuspos-nkg2a_statusneg

              nkg2a_statusneg
                              -1

              nkg2a_statuspos
                               1

              n_genes        
                               0

              mt_frac        
                               0

            
<rpy2.rinterface_lib.sexp.NULLType object at 0x2b471f116e00> [RTYPES.NILSXP]

Dotplots and UMAP plots

genes_of_interest = [
                "KLRC1",
                "HAVCR2",
                "ENTPD1",
                "LAG3",
                "PDCD1",
                "TIGIT",
                "KLRC2",
                "KLRK1",
                "CD226",  # (DNAM-1),
                "CD244",  # (2B4),
                "IL2RB",  # (CD122),
                "ITGA1",  # (CD49a)]
            ]
genes_of_interest2 = ["CD4", "CD8A", "CD8B", "FOXP3", "CD3D", "CD3E", "CD3G", "NCAM1"]
sc.pl.dotplot(
                adata,
                var_names=genes_of_interest,
                groupby="cluster",
                swap_axes=True,
                save="nkg2a.pdf",
            )
WARNING: saving figure to file figures/dotplot_nkg2a.pdf
            

fig, ax = plt.subplots(figsize=(10, 10))
            sc.pl.umap(
                adata,
                color="cluster",
                legend_loc="on data",
                legend_fontoutline=3,
                ax=ax,
                size=80,
                legend_fontsize=11,
            )

fig, ax = plt.subplots(figsize=(10, 10))
            sc.pl.umap(
                adata, color="cell_type", legend_fontoutline=3, ax=ax, size=80, legend_fontsize=11
            )

sc.pl.umap(
                adata,
                color=genes_of_interest,
                ncols=4,
                color_map="YlOrRd",
                save="_nkg2a.pdf",
                size=20,
            )
WARNING: saving figure to file figures/umap_nkg2a.pdf
            

sc.pl.umap(
                adata,
                color=genes_of_interest2,
                ncols=4,
                color_map="YlOrRd",
                save="_nkg2a_2.pdf",
                size=20,
            )
WARNING: saving figure to file figures/umap_nkg2a_2.pdf